---
output:
  bookdown::word_document2:
    reference_docx: "word-template.docx"
  # bookdown::pdf_document2:
    toc: no
    number_sections: no
    pandoc_args: !expr rmdfiltr::add_wordcount_filter(rmdfiltr::add_citeproc_filter(args = NULL))
  #   latex_engine: xelatex
  
always_allow_html: true
# header-includes:
#   - \usepackage{setspace}\doublespace
  # - \usepackage[nolists, fighead, tabhead]{endfloat}
  # - \usepackage{endnotes}
  # - \let\footnote=\endnote
#- \setlength{\headheight}{14.5pt}
#- \setlength{\headheight}{13.6pt}
  # - \usepackage{fancyhdr}
  # - \pagestyle{fancy}
  # - \lhead{N.I. Hoffmann}
  # - \rhead{`r format(Sys.time(), '%B %e, %Y')`}
# - \newcommand{\beginsupplement}{\setcounter{table}{0}  
# \renewcommand{\thetable}{A\arabic{table}} \setcounter{figure}{0} 
# \renewcommand{\thefigure}{A\arabic{figure}}}

editor_options: 
  chunk_output_type: console

citeproc: no
# fontfamily: mathpazo
# fontsize: 12pt
# geometry: margin=1in
# indent: yes
link-citations: yes
linkcolor: blue
lang: 'en-US'



title: "Strangers in the Homeland? The Academic Performance of U.S.-Born Children of Return Migrants in Mexico"


  
---




```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, warning = F, message = F, cache = T, dpi = 800)
options("yaml.eval.expr" = TRUE)

# library(flextable)
library(patchwork)
library(srvyr)
library(patchwork)
library(sandwich)
library(lmtest)
library(estimatr)
library(here)
library(knitr)
library(optmatch)
library(cem)
library(broom)
library(sensemakr)
library(MatchIt)
library(haven)
library(tidyverse)

knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})


options("yaml.eval.expr" = TRUE, scipen = 3, digits = 2)

uclablue = '#2774AE'
gray = '#808080'
black = '#000000'
ucla_palette = c(black, uclablue, gray)


theme_set(theme_classic(base_family = 'Palatino', base_size = 9) + 
            theme(axis.line = element_line(size = .3), 
                  axis.ticks = element_line(size = .3), 
                  legend.title=element_blank(), 
                  panel.grid.major.y = element_line('grey80', size = .3),
                  legend.background = element_rect(fill = "transparent")))

ggplot <- function(...) ggplot2::ggplot(...) + scale_color_brewer(palette="Dark2") +
  scale_fill_brewer(palette="Dark2")

 

```

```{r load, include = F}
pisa_full <- readRDS(here('pisa_return.rds')) %>%
  filter(year %in% 2012:2018) %>%
   filter(across(c(female,  mom_ed, dad_ed, early_ed, cultural_pos, home_ed, age),
                ~ !is.na(.x))) %>%
  mutate(school_id = paste0(year, school_id),
         village = school_location == 'Village',
         non_urban = ifelse(!is.na(school_location), school_location %in% c('Village', 'Small Town', 'Town'), NA))

pisa_mex <- pisa_full %>%
  filter(country == 'Mexico',
         birth_country %in% c('Mexico', 'United States of America'),
         mom_country == 'Mexico',
         dad_country == 'Mexico') %>%
  mutate(birth_country = ifelse(birth_country == 'United States of America', 'USA', as.character(birth_country)),
         treat = as.numeric(birth_country == 'USA'))

pisa_us <-  pisa_full %>%
  filter((country %in% c('United States', 'United States of America') & 
            birth_country == 'United States of America' &
            lang == 'Spanish' &
            mom_country == 'Another country (USA)' & 
            dad_country == 'Another country (USA)')) %>%
  mutate(country = 'USA',
         birth_country = 'USA')

pisa_mex_us <- bind_rows(
    filter(pisa_mex, birth_country == 'USA'),
    pisa_us
) %>%
  mutate(treat = as.numeric(country == 'Mexico'))

```

```{r functions}
rubin <- function(dataset, sample_label, covariates = NULL, pv_num = 5, 
                  treat = 'treat', weights = 'weight', cluster_var = 'school_id'){
  dataset = as.data.frame(dataset)
  dim_list <- list()
  method = ifelse(is.null(covariates), 'DIM', 'OLS')
  
  for(outcome in c('read', 'math', 'scie')){
    est_ols <- rep(NA, pv_num)
    var_ols <- rep(NA, pv_num)
    for(pv in 1:pv_num){
      if(is.null(covariates)){
        formula <- paste0(outcome, pv, ' ~ ', treat)
        } else{
          formula <- paste0(outcome, pv, ' ~ ', treat, ' + ', covariates)
        }
      lm_model <- lm(formula, data = dataset, weights = weight)
      est_ols[pv] <- tidy(lm_model)[[2,2]]
      var_ols[pv] <- vcovCL(lm_model, cluster = dataset[, cluster_var])[2,2]
    }
    est_out_ols <- mean(est_ols)
    var_samp_ols <- mean(var_ols)
    var_imp_ols <- sum((est_ols - est_out_ols)^2)/4
    se_out_ols <- sqrt(var_samp_ols + (1 + 1/5)*var_imp_ols)
    
    dim_list[[outcome]] <-
      data.frame(method = method,
                 sample = sample_label,
                 Outcome = outcome, 
                 Estimate = est_out_ols,
                 se = se_out_ols)
  }
  dim_list %>%
    bind_rows() %>%
    mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science')) %>%
    return()
}

moderator_p <- function(df, cat1, cat2){
  out_list <- list()
  for(outcome in unique(df$Outcome)){
    b1 <- with(df, Estimate[sample == cat1 & Outcome == outcome])
    se1 <- with(df, se[sample == cat1 & Outcome == outcome])
    b2 <- with(df, Estimate[sample == cat2 & Outcome == outcome])
    se2 <- with(df, se[sample == cat2 & Outcome == outcome]) 
    
    z <- (b1 - b2)/sqrt(se1^2 + se2^2)
    p <- round(2*(1 - pnorm(abs(as.numeric(z)))), 3)
    
    out_list[[outcome]] <- data.frame(Outcome = outcome, dif = b1-b2, z = z, p = p)
  }
  return(bind_rows(out_list))
}
  
  

rubin_means <- function(dataset, labels = c(0,1), pv_num = 5, 
                  treat = 'treat', weights = 'weight', cluster_id = 'school_id'){
  dataset <- as_survey_design(dataset, 
                              ids = cluster_id,
                              weights = weights)
  mean_list <- list()
  
  for(outcome in c('read', 'math', 'scie')){
    mean_treat0 <- rep(NA, pv_num)
    mean_treat1 <- rep(NA, pv_num)
    var_treat0 <- rep(NA, pv_num)
    var_treat1 <- rep(NA, pv_num)
    
    for(pv in 1:pv_num){
      
      summary_df <- dataset %>%
        group_by(treat) %>%
        summarize(mean = survey_mean(!!sym(paste0(outcome, pv))),
                  var = survey_var(!!sym(paste0(outcome, pv))))
      
      mean_treat0[pv] <- summary_df[[1,2]]
      mean_treat1[pv] <- summary_df[[2,2]]
      
      var_treat0[pv] <- summary_df[[1,4]]
      var_treat1[pv] <- summary_df[[2,4]]
      
    }
    mean_out0 <- mean(mean_treat0)
    var_samp0 <- mean(var_treat0)
    sd_out0 <- sqrt(var_samp0)
    
    mean_out1 <- mean(mean_treat1)
    var_samp1 <- mean(var_treat1)
    sd_out1 <- sqrt(var_samp1)
    
    mean_list[[outcome]] <-
      data.frame(Outcome = outcome, 
                 label = c(labels[1], labels[2]),
                 n = c(nrow(filter(dataset, treat==0)), nrow(filter(dataset, treat==1))),
                 Mean = c(mean_out0, mean_out1),
                 SD = c(sd_out0, sd_out1)
                 )
  }
  mean_list %>%
    bind_rows() %>%
    mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science')) %>%
    return()
}


stacked_bar <- function(data, x, fill){
  data %>%
    filter(!is.na(!!sym(x)) & !is.na(!!sym(fill))) %>%
    count(!!sym(x), !!sym(fill)) %>%
    ggplot(aes(x = !!sym(x), y = n, fill = !!sym(fill))) +
    geom_bar(position='fill', stat = 'identity') +
    theme(axis.text.x = element_text(angle = -30)) +
    labs(title = fill)
}

```



# Figures

```{r violin, fig.cap = 'Violin plots of the first plausible values of reading, math, and science scores for the sample of interest (U.S.-born children of return migrants in Mexico) and the two comparison groups (Mexican-born children to Mexican parents in Mexico and U.S.-born children to Spanish-speaking immigrants in the U.S.).'}
dodge = position_dodge(width = 1)

bind_rows(
  pisa_mex %>%
    select(Reading = read1, Math = math1, Science = scie1, 
           sample = birth_country) %>%
    mutate(sample = if_else(sample == 'USA', 
                            'A. U.S.-born in Mexico', 
                            'B. Mexican-born in Mexico')) %>%
    pivot_longer(-sample, names_to = 'subject', values_to = 'score') %>%
    group_by(subject, sample) %>%
    mutate(mean_score = mean(score),
           n = format(n(), big.mark = ',')) %>%
    ungroup() %>%
    mutate(sample = paste0(sample, ' (n = ', n, ')')),
  pisa_mex_us %>%
    filter(treat == 0) %>%
    select(Reading = read1, Math = math1, Science = scie1) %>%
    mutate(sample = 'C. Spanish-speaking in U.S.') %>%
    pivot_longer(-sample, names_to = 'subject', values_to = 'score') %>%
    group_by(subject) %>%
    mutate(mean_score = mean(score),
           n = format(n(), big.mark = ',')) %>%
    ungroup() %>%
    mutate(sample = paste0(sample, ' (n = ', n, ')'))) %>%
  mutate(subject = factor(subject, levels = c('Reading', 'Math', 'Science'))) %>%
  ggplot(aes(x = subject, y = score, color = subject)) +
  geom_violin(position = dodge, alpha = .2, 
              aes(fill = subject)) +
  geom_boxplot(width = .2, position = dodge) +
  facet_wrap(~sample) +
  labs(x = '', y = 'PISA score') +
  theme(legend.position = 'hide',
        text = element_text(size = 7))
```

\newpage

```{r dim-mex, results = 'hide', fig.height = 3.5, fig.cap = "Difference-in-means (DIM) and OLS estimates comparing children of return migrants in Mexico to children in Mexico. Error bars represent 95% asymptotic confidence intervals. OLS (pre-migration) models adjust for parents' education, cultural possessions, home educational resources, age, ECEC, gender, and survey year. OLS (post-migration) models additionally adjust for household wealth; home possessions; home ICT resources; an index of economic, social and cultural status; urban or non-urban locality; and highest parental ISEI. All models cluster standard errors at the school level and incorporate sampling weights."}

dim_fig_mex_us <-
  bind_rows(rubin(pisa_mex, 'DIM'),
          rubin(pisa_mex, 'OLS (pre-migration)', 'mom_ed + dad_ed + female + early_ed +cultural_pos + home_ed + age + year'),
          rubin(pisa_mex, 'OLS (pre- & post-migration)',
                covariates = 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + non_urban +
                wealth + home_pos + ict_res + escs + parent_isei')
) %>%
  mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science'),
            sample = factor(sample, 
                            levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)')))

dim_fig_mex_us %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Estimated difference in PISA score') +
  theme(#axis.text.x=element_text(angle=-15),
        legend.justification=c(1,0), 
        legend.position=c(.4,.7))



```

\newpage

```{r mod-desc-mex, fig.cap = 'Sample distributions of gender, school location, and age at arrival. The first two panels compare the U.S.-born children of return migrants to Mexican-born locals, while the third panel shows the distribution only for the former. All estimates incorporate sampling weights.'}
plot_female <- pisa_mex %>%
  group_by(treat) %>%
  count(female, wt = weight) %>%
  mutate(treat = ifelse(treat == 1, "U.S.-born.", "Mexico"),
         female = ifelse(female == 1, 'Female', 'Male')) %>%
  ggplot(aes(x = treat, y = n, fill = female)) +
  geom_bar(position='fill', stat = 'identity') +
  geom_text(aes(label = female), size = 2, 
            position = position_fill(vjust = 0.5), color = 'white') +
  labs(x = '', y = 'Proportion', 
       title = 'A. Gender') + 
  theme(legend.position = "none")

plot_location <- pisa_mex %>%
  filter(!is.na(school_location)) %>%
  group_by(treat) %>%
  count(school_location, wt = weight) %>%
  mutate(treat = ifelse(treat == 1, "U.S.-born", "Mexico")) %>%
  ggplot(aes(x = treat, y = n, fill = school_location)) +
  geom_bar(position='fill', stat = 'identity') +
  geom_text(aes(label = school_location), size = 2,
            position = position_fill(vjust = 0.5), color = 'white') +
  labs(x = '', y = '', 
       title = 'B. School Location') + 
  theme(legend.position = "none")

age_arrival_df <- pisa_mex %>%
  filter(treat == 1, !is.na(age_arrival)) %>%
   count(age_arrival, wt = weight) %>%
  mutate(prop = n /sum(.[,'n']))

plot_age_arrival <- age_arrival_df %>%
  ggplot(aes(x = age_arrival, y = prop)) +
  geom_col(width = .99) +
  labs(x = '', y = '', 
       title = 'C. Age at Arrival')

plot_female + plot_location + plot_age_arrival & theme(text = element_text(size = 8))
```

\newpage

```{r mod-est-mex, fig.cap = 'Moderators of the difference in PISA scores between U.S.-born children of return migrants and Mexican-born adolescents. All estimates come from OLS models that adjust for the controls specified in the data section, cluster standard errors at the school level, and incorporate sampling weights.'}
controls = '+ mom_ed + dad_ed + female + early_ed + cultural_pos + home_ed + age + year'

mod_df_mex_us <- bind_rows(
  rubin(filter(pisa_mex, female == 1), 'Girls', covariates = controls),
  rubin(filter(pisa_mex, female == 0), 'Boys', covariates = controls),
  rubin(filter(pisa_mex, non_urban == 1), 'Non-urban', covariates = controls),
  rubin(filter(pisa_mex, non_urban == 0), 'Urban', covariates = controls),
  rubin(filter(pisa_mex, village == 1), 'Village', covariates = controls),
  rubin(filter(pisa_mex, village == 0), 'Non-village', covariates = controls),
  rubin(filter(pisa_mex, age_arrival < 5 | treat == 0), 'Under 5', covariates = controls),
  rubin(filter(pisa_mex, age_arrival >= 5 | treat == 0), '5 and older', covariates = controls),
  rubin(filter(pisa_mex, age_arrival < 10 | treat == 0), 'Under 10', covariates = controls),
  rubin(filter(pisa_mex, age_arrival >= 10 | treat == 0), '10 and older', covariates = controls)
)


mod_df_mex_us %>% 
  mutate(sample = factor(sample, levels = unique(mod_df_mex_us$sample))) %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, 
                    ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Difference in scores for U.S.-born'
       #title = 'Across moderators, the 0.5 generation performs as well or better than local Mexican students'
       ) +
  theme(axis.text.x=element_text(angle=-15),
        legend.justification=c(0,0),
        legend.position=c(0.175,.02)) 

```

\newpage

```{r dim-us, results = 'hide', fig.height = 3.5, fig.cap = "Difference-in-means (DIM) and OLS estimates comparing children of return migrants in Mexico to children of Spanish-speaking immigrants in the U.S. Error bars represent 95% asymptotic confidence intervals. OLS (pre-migration) models adjust for parents' education, cultural possessions, home educational resources, age, ECEC, gender, and survey year. OLS (post-migration) models additionally adjust for household wealth; home possessions; home ICT resources; an index of economic, social and cultural status; urban or non-urban locality; and highest parental ISEI. All models cluster standard errors at the school level and incorporate sampling weights."}
dim_fig_us <-
  bind_rows(rubin(pisa_mex_us, 'DIM', weights = 'senate_weight'),
          rubin(pisa_mex_us, 'OLS (pre-migration)', 
                'mom_ed + dad_ed + female + early_ed +cultural_pos + home_ed + age + year',
                weights = 'senate_weight'),
          rubin(pisa_mex_us, 'OLS (pre- & post-migration)',
                covariates = 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + 
                wealth + home_pos + ict_res + escs + parent_isei', 
                weights = 'senate_weight')
          ) %>%
  mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science'),
         sample = factor(sample, 
                            levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)'))) 

dim_fig_us %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = 'Method', y = '') +
  theme(#axis.text.x=element_text(angle=-15),
        legend.justification=c(1,0), 
        legend.position=c(.45,.05))
```

\newpage

```{r mex-us-compare, fig.cap = 'Histograms comparing the family background of U.S.-born children of return migrants in Mexico to the children of Spanish-speaking immigrants in the U.S. in PISA 2012, 2015, and 2018. Estimates incorporate sampling weights.'}
plot_cat <- function(var_name, title){
  pisa_mex_us %>%
      mutate(treat = ifelse(treat == 1, 'U.S.-born in Mexico', 'U.S. Spanish-Speaking')) %>%
      filter(!is.na(!!sym(var_name))) %>%
      group_by(treat, !!sym(var_name)) %>%
      count(wt = senate_weight) %>%
      group_by(treat) %>%
      mutate(prop = n /sum(n)) %>%
      ggplot(aes(x = !!sym(var_name), y = prop, color = treat, fill = treat)) +
      geom_col(position = 'dodge') +
      labs(x = '', y = '', 
           title = title)
}

plot_cont <- function(var_name, title){
   pisa_mex_us %>%
      mutate(treat = ifelse(treat == 1, 'U.S.-born in Mexico', 'U.S. Spanish-Speaking')) %>%
      ggplot(aes(x = !!sym(var_name), color = treat, fill = treat)) +
      geom_histogram(aes(y=..density..,
                     weight = senate_weight),
                     position = 'dodge') +
  labs(x = '', y = '', title = title)
}

plot_cat('early_ed', 'A. Early Childhood Education') + 
  plot_cat('parent_ed_years', "B. Parent's Years of Education") + 
  scale_x_continuous(breaks = unique(pisa_mex_us$parent_ed_years)) +
  plot_cont('wealth', 'C. Wealth Composite')  +
  plot_cont('parent_isei', 'D. Parent\'s Occupation (ISEI)') + 
  plot_layout(nrow = 2, guides = 'collect')  & 
  theme(legend.position = 'bottom', text = element_text(size = 8))

```

\newpage

```{r mod-est-us, fig.cap = 'Moderators of the difference in PISA scores between U.S.-born children of return migrants in Mexico and children of Spanish-speaking immigrants in the U.S. All estimates come from OLS models that adjust for the controls specified in the data section, cluster standard errors at the school level, and incorporate sampling weights.'}

controls = 'mom_ed + dad_ed + female + early_ed + cultural_pos + home_ed + age + year'

mod_df_us <- bind_rows(
  rubin(filter(pisa_mex_us, female == 1), 'Girls', covariates = controls),
  rubin(filter(pisa_mex_us, female == 0), 'Boys', covariates = controls),
  rubin(filter(pisa_mex_us, non_urban == 1), 'Non-urban', covariates = controls),
  rubin(filter(pisa_mex_us, non_urban == 0), 'Urban', covariates = controls),
  rubin(filter(pisa_mex_us, village == 1), 'Village', covariates = controls),
  rubin(filter(pisa_mex_us, village == 0), 'Non-village', covariates = controls),
  rubin(filter(pisa_mex_us, age_arrival < 5 | treat == 0), 'Under 5', covariates = controls),
  rubin(filter(pisa_mex_us, age_arrival >= 5 | treat == 0), '5 and older', covariates = controls),
  rubin(filter(pisa_mex_us, age_arrival < 10 | treat == 0), 'Under 10', covariates = controls),
  rubin(filter(pisa_mex_us, age_arrival >= 10 | treat == 0), '10 and older', covariates = controls)
)


non_urban_df <- rubin_means(filter(pisa_mex_us, non_urban == 1), labels = c('Mexico', 'U.S.'))
urban_df <- rubin_means(filter(pisa_mex_us, non_urban == 0), labels = c('Mexico', 'U.S.'))

mod_df_us %>% 
  mutate(sample = factor(sample, levels = unique(mod_df_us$sample))) %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, 
                    ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Difference in scores for children of return migrants'
       ) +
  theme(axis.text.x=element_text(angle=-15),
        legend.justification=c(0,0),
        legend.position=c(0.175,.02)) 
```

\newpage

```{r alt-mods-fig, results = 'hide', fig.height = 3.5, fig.cap = "Alternate estimates comparing children of return migrants in Mexico to children in Mexico and children of Spanish-speaking immigrants in the U.S. Error bars represent 95% asymptotic confidence intervals. Models adjust for parents' education, cultural possessions, home educational resources, age, ECEC, gender, and survey year. OLS is the baseline model, PSM uses propensity score matching, matches on Mahalanobis distance, Optimal Full implements the matching algorithm from Hansen and Klopfer (2006), and IPW incorporates (stabilized) inverse probability weights."}
controls <- "mom_ed + dad_ed + female + early_ed + cultural_pos + home_ed + age + year"

psm_mx <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex,
                           method = 'nearest',
                           distance = 'glm',
                           s.weights = pisa_mex$weight) 
mahvars_mx <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex,
                           method = 'nearest',
                           distance = 'mahalanobis',
                           s.weights = pisa_mex$weight) 
full_mx <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex,
                           method = 'full',
                           s.weights = pisa_mex$weight) 
# IPW
pi.out = glm(treat ~ mom_ed + dad_ed + female + early_ed +
               cultural_pos + home_ed + age + year,
             data = pisa_mex,
             family="binomial"(link=logit))
ps = pi.out$fit
D = pisa_mex$treat
PrD = mean(D)
IPW_mex = (D*PrD+(1-D)*(1-PrD))/(D*ps+(1-D)*(1-ps))

          
alt_fig_mex <- bind_rows(
  rubin(pisa_mex, 'OLS', controls),
  rubin(match.data(psm_mx), 'PSM', weights = 'weights'),
  rubin(match.data(mahvars_mx), 'Mahalanobis', weights = 'weights'),
  rubin(match.data(full_mx), 'Optimal Full', weights = 'weights'),
  rubin(pisa_mex, 'IPW', weights = IPW_mex))

psm_us <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex_us,
                           method = 'nearest',
                           distance = 'glm',
                           s.weights = pisa_mex_us$weight)
mahvars_us <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex_us,
                           method = 'nearest',
                           distance = 'mahalanobis',
                           s.weights = pisa_mex_us$weight) 
full_us <- MatchIt::matchit(treat ~ mom_ed + dad_ed + female + early_ed +
                           cultural_pos + home_ed + age + year,
                           data = pisa_mex_us,
                           method = 'full',
                           s.weights = pisa_mex_us$weight) 
pi.out = glm(treat ~ mom_ed + dad_ed + female + early_ed +
               cultural_pos + home_ed + age + year,
             data = pisa_mex_us,
             family="binomial"(link=logit))
ps = pi.out$fit
D = pisa_mex_us$treat
PrD = mean(D)
IPW_us = (D*PrD+(1-D)*(1-PrD))/(D*ps+(1-D)*(1-ps))
alt_fig_us <- bind_rows(
  rubin(pisa_mex_us, 'OLS', controls),
  rubin(match.data(psm_us), 'PSM', weights = 'weights'),
  rubin(match.data(mahvars_us), 'Mahalanobis', weights = 'weights'),
  rubin(match.data(full_us), 'Optimal Full', weights = 'weights'),
  rubin(pisa_mex_us, 'IPW', weights = IPW_us))
alt_fig <- bind_rows(
    mutate(alt_fig_mex, country = 'Mexico'), 
    mutate(alt_fig_us, country = 'U.S.')) %>%
  mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science'),
           sample = factor(sample,
                           levels = c('OLS', 'PSM', 'Mahalanobis', 'Optimal Full', 'IPW')))

alt_fig %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  facet_wrap(~country) +
  labs(x = '', y = 'Estimated difference in PISA score') +
  theme(axis.text.x=element_text(angle=-15),
        legend.justification=c(1,0), 
        legend.position=c(1,.6))
```

